There are 231 subjects with patients’ age and the amount of exercise expressed as estimated hours per week with 2 groups (control and patient).

data("Blackmore")
skimr::skim(Blackmore)
Data summary
Name Blackmore
Number of rows 945
Number of columns 4
_______________________
Column type frequency:
factor 2
numeric 2
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
subject 0 1 FALSE 231 100: 5, 101: 5, 105: 5, 106: 5
group 0 1 FALSE 2 pat: 586, con: 359

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 11.44 2.77 8 10.0 12.00 14.00 17.92 ▇▇▇▆▃
exercise 0 1 2.53 3.50 0 0.4 1.33 3.04 29.96 ▇▁▁▁▁
control <- Blackmore[Blackmore$group == "control",]
skimr::skim(control)
Data summary
Name control
Number of rows 359
Number of columns 4
_______________________
Column type frequency:
factor 2
numeric 2
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
subject 0 1 FALSE 93 200: 5, 205: 5, 207: 5, 212: 5
group 0 1 FALSE 1 con: 359, pat: 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 11.26 2.70 8 8.00 10.00 13.50 17.92 ▇▇▇▅▂
exercise 0 1 1.64 1.81 0 0.43 1.05 2.15 11.54 ▇▂▁▁▁
length(unique(control$subject))
## [1] 93
Blackmore$log.exercise <- log(Blackmore$exercise + 5/60, 2)
attach(Blackmore)
con <- sample(unique(subject[group == "control"]), 20)
con.20 <- Blackmore[is.element(subject, con),]
ggplot(con.20, aes(I(age-8), log.exercise)) + geom_point() + facet_wrap(.~subject)

pat <- sample(unique(subject[group == "control"]), 20)
pat.20 <- Blackmore[is.element(subject, pat),]
ggplot(pat.20, aes(I(age-8), log.exercise)) + 
  geom_point() + facet_wrap(.~subject) +
  geom_smooth(method = "lm", se = FALSE)

detach(Blackmore)
summary(I(Blackmore$age - 8))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.000   4.000   3.442   6.000   9.920

Model

fit <- lmer(log.exercise ~ I(age-8)*group + (1 |subject), data = Blackmore)

summary(fit)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.exercise ~ I(age - 8) * group + (1 | subject)
##    Data: Blackmore
## 
## REML criterion at convergence: 3632.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2142 -0.4735  0.1269  0.5643  2.7214 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept) 1.875    1.369   
##  Residual             1.807    1.344   
## Number of obs: 945, groups:  subject, 231
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)             -0.28322    0.18066  -1.568
## I(age - 8)               0.06901    0.02717   2.540
## grouppatient            -0.33354    0.23306  -1.431
## I(age - 8):grouppatient  0.22816    0.03387   6.737
## 
## Correlation of Fixed Effects:
##             (Intr) I(g-8) grpptn
## I(age - 8)  -0.471              
## grouppatint -0.775  0.365       
## I(g-8):grpp  0.378 -0.802 -0.473

vcov is the covariance matrix of the fixed-effect estimates which is consistent with the mathmatical expression \(V(\hat \beta) = (X^\top \Omega^{-1} X)^{-1}\) (solve(t(X) %*% Sigma_inv %*% X)).

N <- nrow(Blackmore)
subjects <- unique(Blackmore$subject)

y <- Blackmore$log.exercise
X <- model.matrix(~ I(age-8)*group, data = Blackmore)
beta <- fixef(fit)
Z <- model.matrix(~ subject-1, data = Blackmore)
u <- ranef(fit)$subject[,1]
q <- length(u)

vc <- as.data.frame(VarCorr(fit))
R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
G <- vc$vcov[vc$grp == 'subject'] * diag(q)

Sigma <- Z %*% G %*% t(Z) + R 
Sigma_inv <- solve(Sigma)

P <- Sigma_inv - Sigma_inv %*% X %*% vcov(fit) %*% t(X) %*% Sigma_inv

varMargRes <- Sigma - X %*% vcov(fit) %*% t(X) 
varCondRes <- R %*% P %*% R  
varU <- G - G %*% t(Z) %*% P %*% Z %*% G

fit_bl <- Blackmore %>% 
  mutate(fitted = as.vector(X %*% beta),
         resm = y - fitted,
         resc = y - fitted - Z %*% u,
         index = 1:n()) %>% 
  rowwise() %>% 
  mutate(resm_std = resm/sqrt(diag(varMargRes)[index]),
         resc_std = resc/sqrt(diag(varCondRes)[index])) %>% 
  ungroup()

unit_bl <- expand_grid(subjects) %>% 
  mutate(Vi = map_dbl(subjects, ~{
    ind <- Blackmore %>% 
      mutate(index = 1:n()) %>% 
      filter(subject == .x) %>% 
      pull(index)
    mi <- length(ind)
    Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% fit_bl$resm[ind]
    sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
  }),
  Mi = as.vector(t(u) %*% solve(varU) %*% u),
  index = 1:n())

Diagnostic Plot 1: Linearity of fixed effects

ggplot(fit_bl, aes(fitted, resm_std)) +
  geom_hline(yintercept = 0) +
  geom_point()

Diagnostic Plot 2: Prescence of outlying observations

ggplot(fit_bl, aes(index, resm_std)) + geom_point() + geom_hline(yintercept = 0)

Diagnostic Plot 3: Within-units covariance matrix

ggplot(unit_bl, aes(index, Vi)) + geom_point()+ xlab("Unit indicies") + ylab("Modified Lesaffre-Verbeke index")

Diagnostic Plot 4: Prescense of outlying observations using conditional residual

ggplot(fit_bl, aes(index, resc_std)) + geom_point() + geom_hline(yintercept = 0)

Diagnostic Plot 5: Homoskedasticity of conditional errors

ggplot(fit_bl, aes(fitted, resc_std)) + geom_point() + geom_hline(yintercept = 0)

Diagnostic Plot 6: Normality of conditional errors

ggplot(fit_bl, aes(sample = resc_std)) + 
  geom_qq() + geom_qq_line(color = "red")

Diagnostic Plot 7: Presencse of outlying subjects

ggplot(unit_bl, aes(index, Mi)) + geom_point() + xlab("Unit index") + ylab("Manhalanobis distance")

Diagnostic Plot 8: Normality of the random effects

ggplot(unit_bl, aes(sample = Mi)) + 
  geom_qq(distribution = stats::qchisq,
          dparams = list(df = q)) +
  geom_qq_line(color = "red",
               distribution = stats::qchisq,
               dparams = list(df = q))

Lineup Protocol

lineup(null_permute("resm_std"), fit_bl) %>% 
  ggplot(aes(fitted, resm_std)) + 
  geom_point() +
  facet_wrap(~.sample) + 
  xlab(NULL) + 
  ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank())

lineup(null_permute("resm_std"), fit_bl) %>% 
  ggplot(aes(index, resm_std)) + 
  geom_point() +
  facet_wrap(~.sample) + 
  xlab(NULL) + 
  ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank())

lineup(null_permute("Vi"), unit_bl) %>% 
ggplot(aes(index, Vi)) + 
  geom_point() +
  facet_wrap(~.sample) + 
  xlab(NULL) + 
  ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank())

lineup(null_permute("resc_std"), fit_bl) %>% 
  ggplot(aes(index, resc_std)) +
  geom_point() + 
  facet_wrap(~.sample) + 
  xlab(NULL) + 
  ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank())

lineup(null_permute("resc_std"), fit_bl) %>% 
  ggplot(aes(fitted, resc_std)) +
  geom_point() + 
  facet_wrap(~.sample) + 
  xlab(NULL) + 
  ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank())

Fail in Normality of conditional errors

lineup(null_permute("resc_std"), fit_bl) %>% 
  ggplot(aes(sample = resc_std)) + 
  geom_qq() +
  geom_qq_line(color = "red") + 
  facet_wrap(~.sample) + 
  xlab(NULL) + 
  ylab(NULL) +
  theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), axis.ticks.y = element_blank())

Since the \(M_i\) are the same, there is no need to do that

lineup(null_permute("Mi"), unit_bl) %>% 
  ggplot(aes(index, Mi))+
  geom_point()+
  facet_wrap(~.sample)

lineup(null_permute("Mi"), unit_bl) %>% 
  ggplot(aes(sample = Mi)) + 
  geom_qq(distribution = stats::qchisq,
          dparams = list(df = q)) +
  geom_qq_line(color = "red",
               distribution = stats::qchisq,
               dparams = list(df = q)) +
  facet_wrap(~.sample)

d <- lineup(null_permute("log.exercise"), Blackmore)
simdat <- map_dfr(1:20, function(i){
  df <- d %>% filter(.sample == i)
  m <- fit <- lmer(log.exercise ~ I(age-8)*group + (1 |subject), data = df)
  
  N <- nrow(df)
  subjects <- unique(df$subject)

  y <- df$log.exercise
  X <- model.matrix(~ I(age-8)*group, data = df)
  beta <- fixef(m)
  Z <- model.matrix(~ subject-1, data = df)
  u <- ranef(m)$subject[,1]
  q <- length(u)

  vc <- as.data.frame(VarCorr(m))
  R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
  G <- vc$vcov[vc$grp == 'subject'] * diag(q)

  Sigma <- Z %*% G %*% t(Z) + R 
  Sigma_inv <- solve(Sigma)

  P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv

  varMargRes <- Sigma - X %*% vcov(m) %*% t(X) 
  varCondRes <- R %*% P %*% R  
  varU <- G - G %*% t(Z) %*% P %*% Z %*% G
  
  fitted = as.vector(X %*% beta)
  resm = y - fitted
  resc = y - fitted - Z %*% u
  index = 1:nrow(df)
  
  resm_std = resm / sqrt(diag(varMargRes)[index])
  resc_std = resc / sqrt(diag(varCondRes)[index])
  
  tibble::tibble(df, fitted, resm_std, resc_std, index)
})